Logistic Regression

Dataset

A random n-class classification dataset can be generated using sklearn.datasets.make_classification. Here, we generate a dataset with two features and 1000 instances. Moreover, the dataset is generated for multiclass classification with five classes.

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from num2words import num2words

n_features =2
n_classes = 3
X, y = make_classification(n_samples = int((n_classes-1)*1e3),
                           n_features = n_features, n_redundant=0, n_classes = n_classes,
                           n_informative=2, random_state=1, n_clusters_per_class=1)
Labels_dict = dict(zip(list(np.unique(y)), [num2words(x).title() for x in np.unique(y)]))

Data = pd.DataFrame(data = X, columns = ['Feature %i' % (i+1) for i in range(n_features)])
Target = 'Outcome Variable'
Data[Target] = y
display(Data)

from HD_DeepLearning import Plot_Data
    
PD = dict(BP = .5, alpha=.7, bg_alpha = 0.25, grid = True, cricle_size = 50,
          FigSize = 7, h=0.02, pad=1, ColorMap =  'Set1', Labels = list(Labels_dict.values()))

Plot_Data(X, y, PD = PD, Labels_dict = Labels_dict, ax = None)
Feature 1 Feature 2 Outcome Variable
0 0.421823 -1.258802 2
1 1.174360 1.586866 0
2 -0.444844 0.623748 2
3 1.286082 1.791197 0
4 1.050679 1.105048 0
... ... ... ...
1995 -1.929029 0.119340 2
1996 -0.010248 -0.785788 1
1997 1.796874 3.145459 0
1998 1.656980 2.623708 0
1999 0.820434 0.450676 0

2000 rows × 3 columns

Train and Test Sets

In [2]:
Pull = [.01 for x in range((len(Labels_dict)-1))]
Pull.append(.1)

import plotly.express as px
from HD_DeepLearning import DatasetTargetDist
PD = dict(PieColors = px.colors.sequential.Plasma_r, TableColors = ['Navy','White'], hole = .4,
          row_heights=[0.35, 0.65],textfont = 14, height = 500, tablecolumnwidth = [0.25, 0.15, 0.15],
          pull = Pull, legend_title = Target, title_x = 0.5, title_y = .9, pie_legend = [0.1, 0.12])
del Pull
DatasetTargetDist(Data, Target, Labels_dict, PD)

StratifiedKFold is a variation of k-fold which returns stratified folds: each set contains approximately the same percentage of samples of each target class as the complete set.

In [3]:
from sklearn.model_selection import StratifiedShuffleSplit

Test_Size = 0.3
sss = StratifiedShuffleSplit(n_splits=1, test_size=Test_Size, random_state=42)
_ = sss.get_n_splits(X, y)
for train_index, test_index in sss.split(X, y):
    # X
    if isinstance(X, pd.DataFrame):
        X_train, X_test = X.loc[train_index], X.loc[test_index]
    else:
        X_train, X_test = X[train_index], X[test_index]
    # y    
    if isinstance(y, pd.Series):
        y_train, y_test = y[train_index], y[test_index]
    else:
        y_train, y_test = y[train_index], y[test_index]
del sss
   
from HD_DeepLearning import Train_Test_Dist
PD.update(dict(column_widths=[0.3, 0.3, 0.3], tablecolumnwidth = [0.2, 0.4], height = 550, legend_title = Target))

Train_Test_Dist(X_train, y_train, X_test, y_test, PD, Labels_dict)

Modeling: PyTorch Multinomial Logistic Regression for Multi-Class Classification

Multinomial logistic regression is a classification method that generalizes logistic regression to multiclass problems.

In [4]:
import torch

def TorchSets(Set):
    # Inut: Arrays
    # GPU Cuda
    if isinstance(Set, (pd.DataFrame, pd.Series)):
        Set = Set.values
    if torch.cuda.is_available():
        if Set.ndim==1:
            Out = torch.autograd.Variable(torch.from_numpy(Set).type(torch.LongTensor).cuda())
        else:
            Out = torch.autograd.Variable(torch.from_numpy(Set).cuda())
    # CPU
    else:
        if Set.ndim==1:
            Out = torch.autograd.Variable(torch.from_numpy(Set).type(torch.LongTensor))
        else:
            Out = torch.autograd.Variable(torch.from_numpy(Set))
    return Out

# Tensors
X_train_tensor = TorchSets(X_train)
y_train_tensor = TorchSets(y_train)
X_test_tensor = TorchSets(X_test)
y_test_tensor = TorchSets(y_test)
    
Batch_size = 100
iteration_number = int(3e2)

epochs_number = int(iteration_number / (len(X_train) / Batch_size))

# Pytorch train and test sets
Train_set = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
Test_set = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)

# data loader
train_loader = torch.utils.data.DataLoader(Train_set, batch_size = Batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(Train_set, batch_size = Batch_size, shuffle = False)
In [5]:
class LogisticRegressionModel(torch.nn.Module):
    def __init__(self, input_Size, output_Size):
        super(LogisticRegressionModel, self).__init__()
        self.linear = torch.nn.Linear(input_Size, output_Size)
    
    def forward(self, x):
        out = self.linear(x)
        return out

Fitting the model

In [6]:
input_Size, output_Size = n_features, len(Labels_dict)
hidden_Size = 256

# model
model = LogisticRegressionModel(input_Size, output_Size)

# GPU
if torch.cuda.is_available():
    model.cuda()

# Cross Entropy Loss 
criterion= torch.nn.CrossEntropyLoss()

# Optimizer 
optimizer = torch.optim.SGD(model.parameters(), lr= 1e-2, momentum=.9)

# Traning the Model
Count = 0
Loss_list = []
Iteration_list = []
Accuracy_list = []
MSE_list = []
MAE_list = []
Steps = 10

import progressbar
Progress_Bar = progressbar.ProgressBar(maxval= iteration_number + 200,
                                       widgets=[progressbar.Bar('=', '|', '|'),
                                                progressbar.Percentage()])

for epoch in range(epochs_number):
    for i, (Xtr, ytr) in enumerate(train_loader):
        
        # Variables
        Xtr = torch.autograd.Variable(Xtr.view(-1, n_features))
        ytr = torch.autograd.Variable(ytr)
        
        # Set all gradients to zero
        optimizer.zero_grad()
        
        # Forward
        Out = model(Xtr.float())
        
        # loss
        loss = criterion(Out, ytr.long())
        
        # Backward (Calculating the gradients)
        loss.backward()
        
        # Update parameters
        optimizer.step()
        
        Count += 1
        
        del Xtr, ytr
        
        # Predictions
        if Count % Steps == 0:
            # Calculate Accuracy         
            Correct, Total = 0, 0
            # Predictions
            for Xts, yts in test_loader: 
                Xts = torch.autograd.Variable(Xts.view(-1, n_features))
                
                # Forward
                Out = model(Xts.float())
                
                # The maximum value of Out
                Predicted = torch.max(Out.data, 1)[1]
                
                # Total number of yts
                Total += len(yts)
                
                # Total Correct predictions
                Correct += (Predicted == yts).sum()
            del Xts, yts
            # storing loss and iteration
            Loss_list.append(loss.data)
            Iteration_list.append(Count)
            Accuracy_list.append(Correct / float(Total))
            
        Progress_Bar.update(Count)

Progress_Bar.finish()

history = pd.DataFrame({'Iteration': np.array(Iteration_list),
                      'Loss': np.array([x.cpu().data.numpy() for x in Loss_list]),
                      'Accuracy': np.array([x.cpu().data.numpy() for x in Accuracy_list])})
del Loss_list, Iteration_list, Accuracy_list
|=========================================================================|100%

Model Performance

In [7]:
from HD_DeepLearning import Plot_history
PD = dict(row_heights = [0.4, 0.6], lw = 1.5, font_size=12, height = 700, yLim = 1.5,
          th_line_color = 'Navy', th_fill_color='darkslategray', table_columnwidth = [0.4, 0.4, 0.4, 0.4],
          tc_line_color = 'Navy', tc_fill_color = None, title_x = 0.46, title_y = 0.92, tb_cell_heigh = 20,
          Number_Format = '%.4e')

Plot_history(history, PD, Title = 'Test Set', Colors = ['DarkGreen', 'Red'])
In [8]:
from HD_DeepLearning import Plot_Classification_Torch
import matplotlib.pyplot as plt

PD = dict(BP = .5, alpha=.7, bg_alpha = 0.15, grid = False, cricle_size = 50,
          FigSize = 7, h=0.02, pad=1, ColorMap =  'Set1', Labels = list(Labels_dict.values()))

fig, ax = plt.subplots(1, 2, figsize=(16, 7))
# Train Set
Plot_Classification_Torch(model, X_train, y_train, PD = PD, ax = ax[0])
_ = ax[0].set_title('Train Set', fontsize = 16, weight='bold')
# Test Set
Plot_Classification_Torch(model, X_test, y_test, PD = PD, ax = ax[1])
_ = ax[1].set_title('Test Set', fontsize = 16, weight='bold')

Confusion Matrix

The confusion matrix allows for visualization of the performance of an algorithm. Note that due to the size of data, here we don't provide a Cross-validation evaluation. In general, this type of evaluation is preferred.

In [9]:
from sklearn import metrics

# Train
y_pred = model(X_train_tensor.float())
y_pred = torch.max(y_pred.data, 1)[1]
y_pred = y_pred.cpu().data.numpy()
Reports_Train = pd.DataFrame(metrics.classification_report(y_train, y_pred, target_names=list(Labels_dict.values()),
                                                           output_dict=True)).T
CM_Train = metrics.confusion_matrix(y_train, y_pred)
# Test
y_pred = model(X_test_tensor.float())
y_pred = torch.max(y_pred.data, 1)[1]
y_pred = y_pred.cpu().data.numpy()
Reports_Test = pd.DataFrame(metrics.classification_report(y_test, y_pred, target_names=list(Labels_dict.values()),
                                                          output_dict=True)).T
CM_Test = metrics.confusion_matrix(y_test, y_pred)

Reports_Train = Reports_Train.reset_index().rename(columns ={'index': 'Train Set'})
Reports_Test = Reports_Test.reset_index().rename(columns ={'index': 'Test Set'})
                                                 
display(Reports_Train.style.hide(axis='index').set_properties(**{'background-color': 'HoneyDew', 'color': 'Black'}).\
        set_properties(subset=['Train Set'], **{'background-color': 'SeaGreen', 'color': 'White'}))
display(Reports_Test.style.hide(axis='index').set_properties(**{'background-color': 'Azure', 'color': 'Black'}).\
        set_properties(subset=['Test Set'], **{'background-color': 'RoyalBlue', 'color': 'White'}))

from HD_DeepLearning import Confusion_Mat
PD = dict(FS = (14, 6), annot_kws = 14, shrink = .6, Labels = list(Labels_dict.values()))
Confusion_Mat(CM_Train, CM_Test, PD = PD, n_splits = None)
Train Set precision recall f1-score support
Zero 0.907816 0.970021 0.937888 467.000000
One 0.846154 0.795745 0.820175 470.000000
Two 0.769063 0.762419 0.765727 463.000000
accuracy 0.842857 0.842857 0.842857 0.842857
macro avg 0.841011 0.842728 0.841263 1400.000000
weighted avg 0.841227 0.842857 0.841434 1400.000000
Test Set precision recall f1-score support
Zero 0.913876 0.955000 0.933985 200.000000
One 0.862434 0.810945 0.835897 201.000000
Two 0.772277 0.783920 0.778055 199.000000
accuracy 0.850000 0.850000 0.850000 0.850000
macro avg 0.849529 0.849955 0.849313 600.000000
weighted avg 0.849679 0.850000 0.849409 600.000000

Refrences

  1. Stathakis, D. (2009). How many hidden layers and nodes?. International Journal of Remote Sensing, 30(8), 2133-2147.
  2. Artificial neural network. Retrieved June 02, 2020, from https://en.wikipedia.org/wiki/Artificial_neural_network.